Install some packages or libraries

library(ggplot2)
library(ezids)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(readr)
library(tidyr)
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
library(knitr)
library(magrittr)
## 
## Attaching package: 'magrittr'
## The following object is masked from 'package:tidyr':
## 
##     extract

Read data

Read the data .CSV files of adults arrest in DC area from 2016-2021

df_2016<-data.frame(read.csv("Arrests 2016 Public.csv"))
df_2017<-data.frame(read.csv("Arrests 2017 Public.csv"))
df_2018<-data.frame(read.csv("Arrests by Year, 2018.csv"))
df_2019<-data.frame(read.csv("Arrests by Year, 2019.csv"))
df_2020<-data.frame(read.csv("Arrests by Year 2020.csv"))
df_2021<-data.frame(read.csv("2021 Adult Arrests.csv"))

Some data cleaning.. Dropping, Binding and Renaming of columns as needed.

#bind the rows in the dataframes; 
# data for year 2016 & 2017 have different column listings to the rest of the dataframes..

colnames(df_2016) = colnames(df_2017)
df_16_17 = rbind(df_2016,df_2017)

#also remove certain hard-coded columns that are not really needed in this analysis (leaving district location for now)
df_16_17 = df_16_17[-c(5,6,15,17:21,23:26)]
df_16_17 <- df_16_17 %>% 
       rename("Offense.Location.District" = "Offense.District")

# replace dots with underscore for clarity sake, i think..
names(df_16_17) = gsub("[.]", "_", names(df_16_17))
#colnames(df_16_17)
# since col names in 2018-2021 are the same, we repeat same processes as above.
df_18_21 = rbind(df_2018,df_2019,df_2020,df_2021)[,-c(5,6,15,17:21,23:26)]

# replace dots with underscore once more
names(df_18_21) = gsub("[.]", "_", names(df_18_21))
#colnames(df_18_21)
#Now bind both newly created dataframes into 1 whole table.
df_full = rbind(df_16_17,df_18_21)
#check for column names
colnames(df_full)
##  [1] "Arrestee_Type"             "Arrest_Year"              
##  [3] "Arrest_Date"               "Arrest_Hour"              
##  [5] "Age"                       "Defendant_PSA"            
##  [7] "Defendant_District"        "Defendant_Race"           
##  [9] "Defendant_Ethnicity"       "Defendant_Sex"            
## [11] "Arrest_Category"           "Charge_Description"       
## [13] "Arrest_Location_District"  "Offense_Location_District"
#find unique values in the race, sex and arrest_category columns.
#unique(df_full$Defendant_Race)
#unique(df_full$Defendant_Sex)
#unique(df_full$Arrest_Category)

# most likely "UNK" is the same as "Unknown", so we can change this
df_full$Defendant_Race[df_full$Defendant_Race == 'UNK'] <- 'UNKNOWN'
#unique(df_full$Defendant_Race) - check that it changed

#same issue, "unk" is very likely "unknown", so change it.
df_full$Defendant_Sex[df_full$Defendant_Sex == 'UNK'] <- 'UNKNOWN'
#unique(df_full$Defendant_Sex) - check that it changed

# Arrest category -  4 different types of Fraud & Financial crimes , 3 types of Release Violations/Fugitive -- group them into one.

df_full$Arrest_Category = gsub("Fraud and Financial Crimes.*","Fraud and Financial Crimes", df_full$Arrest_Category)

df_full$Arrest_Category = gsub("Release Violations/Fugitive.*","Release Violations/Fugitive",df_full$Arrest_Category)

#sort(unique(df_full$Arrest_Category)) - check that new changes were made.

Missing Values

sapply(df_full, function(x) sum(is.na(x)))
##             Arrestee_Type               Arrest_Year               Arrest_Date 
##                         0                         0                         0 
##               Arrest_Hour                       Age             Defendant_PSA 
##                         0                         0                     29143 
##        Defendant_District            Defendant_Race       Defendant_Ethnicity 
##                      9349                         0                         0 
##             Defendant_Sex           Arrest_Category        Charge_Description 
##                         0                        12                        15 
##  Arrest_Location_District Offense_Location_District 
##                       185                        11
Our variables of concern in thsi dataset contain rich data.. over 95% of data available in each useful column.. will not delete any rows
#get month and day variables.. might be interesting, who knows?
df_full <- separate(df_full, col = Arrest_Date, into = c("Year","Month","Day"), sep = "-", remove = FALSE, fill="left")
#remove the new year column formed, it is redundant.. we already have Year column
df_full = df_full[,-4]
colnames(df_full)
##  [1] "Arrestee_Type"             "Arrest_Year"              
##  [3] "Arrest_Date"               "Month"                    
##  [5] "Day"                       "Arrest_Hour"              
##  [7] "Age"                       "Defendant_PSA"            
##  [9] "Defendant_District"        "Defendant_Race"           
## [11] "Defendant_Ethnicity"       "Defendant_Sex"            
## [13] "Arrest_Category"           "Charge_Description"       
## [15] "Arrest_Location_District"  "Offense_Location_District"
# Factorize some variables
df_full$Arrest_Year = as.factor(df_full$Arrest_Year)
df_full$Month = as.factor(df_full$Month)
df_full$Day = as.factor(df_full$Day)
df_full$Defendant_Race = as.factor(df_full$Defendant_Race)
df_full$Defendant_Sex = as.factor(df_full$Defendant_Sex)
df_full$Arrest_Location_District = as.factor(df_full$Arrest_Location_District)
df_full$Offense_Location_District = as.factor(df_full$Offense_Location_District)
# convert to date format
df_full$Arrest_Date = as.Date(df_full$Arrest_Date)
# Day format
df_full$Day = day(df_full$Arrest_Date)
# i want to create a week-day variable
df_full$Weekday = weekdays(df_full$Arrest_Date)
df_full$Weekday = factor(df_full$Weekday, levels = as.character(wday(c(2:7,1), label=TRUE, abbr=FALSE)))

# convert crime types to factors
df_full$Arrest_Category = as.factor(df_full$Arrest_Category)

EDA (?)

Daily Crime Trend (2016-2021)

All Crimes By Hour, Day, Month, Year

Hour of the day.. at what time do these crimes occur the most?

by_hour <- df_full %>% 
           group_by(Arrest_Hour) %>% 
           dplyr::summarise(Total = n())
by_hour
## # A tibble: 24 × 2
##    Arrest_Hour Total
##          <int> <int>
##  1           0  5684
##  2           1  7431
##  3           2  6774
##  4           3  6288
##  5           4  5427
##  6           5  4416
##  7           6  4469
##  8           7  6101
##  9           8  6731
## 10           9  6594
## # … with 14 more rows
ggplot(by_hour, aes(Arrest_Hour, Total, color = Arrest_Hour)) + 
    geom_line() + 
    ggtitle("Crimes By Hour") + 
    xlab("Hour of the Day") + 
    ylab("Total Crimes") 

Day .. What day of the month has the highest crime incidents?

by_day <- df_full %>% 
           group_by(Day) %>% 
           dplyr::summarise(Total = n())
by_day
## # A tibble: 31 × 2
##      Day Total
##    <int> <int>
##  1     1  5518
##  2     2  5195
##  3     3  5242
##  4     4  5138
##  5     5  5192
##  6     6  5000
##  7     7  4880
##  8     8  5027
##  9     9  4949
## 10    10  5091
## # … with 21 more rows
ggplot(by_day, aes(Day, Total, color = Day)) + 
    geom_line() + 
    ggtitle("Crimes By Day") + 
    xlab("Day of the Month") + 
    ylab("Total Crimes")

Day of the week..

by_weekday = df_full %>% group_by(Weekday) %>% 
           dplyr::summarise(Total = n())
by_weekday$Percent <- by_weekday$Total/dim(df_full)[1] * 100
by_weekday
## # A tibble: 7 × 3
##   Weekday   Total Percent
##   <fct>     <int>   <dbl>
## 1 Monday    19560    12.8
## 2 Tuesday   21432    14.1
## 3 Wednesday 23529    15.4
## 4 Thursday  23246    15.3
## 5 Friday    23198    15.2
## 6 Saturday  22148    14.5
## 7 Sunday    19273    12.6
ggplot(by_weekday, aes(Weekday, Total, fill = Weekday)) + 
      geom_bar(stat = "identity") +
      ggtitle("Crimes By Weekday ") + 
      xlab("Day of the Week") + ylab("Count") + 
      theme(legend.position = "none")

Monthly crime incidence

by_month <- df_full %>% 
            group_by(Month) %>% 
            dplyr::summarise(Total = n())

by_month$Percent <- by_month$Total/dim(df_full)[1] * 100
by_month
## # A tibble: 12 × 3
##    Month Total Percent
##    <fct> <int>   <dbl>
##  1 01    12757    8.37
##  2 02    12163    7.98
##  3 03    13631    8.95
##  4 04    12351    8.11
##  5 05    13430    8.81
##  6 06    12729    8.35
##  7 07    13011    8.54
##  8 08    12997    8.53
##  9 09    12583    8.26
## 10 10    13037    8.56
## 11 11    11871    7.79
## 12 12    11826    7.76
ggplot(by_month, aes(Month, Total, fill = Month)) + 
        geom_bar(stat = "identity") + 
        ggtitle("Crimes By Month") + 
        xlab("Month") + 
        ylab("Count") + 
        theme(legend.position = "none")

ggplot(by_month, aes(x=Month, y=Total, group=1)) + geom_line()

####Crime incidence grouped into yearly plots..

by_year = df_full %>% group_by(Arrest_Year) %>% 
           dplyr::summarise(Total = n())
by_year$Percent <- by_year$Total/dim(df_full)[1] * 100
by_year
## # A tibble: 6 × 3
##   Arrest_Year Total Percent
##   <fct>       <int>   <dbl>
## 1 2016        29980    19.7
## 2 2017        31209    20.5
## 3 2018        29115    19.1
## 4 2019        27938    18.3
## 5 2020        18491    12.1
## 6 2021        15653    10.3
ggplot(by_year, aes(Arrest_Year, Total, fill = Arrest_Year)) + 
      geom_bar(stat = "identity") +
      ggtitle("Crimes By Year ") + 
      xlab("Year") + ylab("Count") + 
      theme(legend.position = "none")

ggplot(by_year, aes(x=Arrest_Year, y=Total, group=1)) + geom_line()

Time to investigate our main focus group - White Males - EDA

#unique(df_full$Defendant_Race)
#table(df_full$Defendant_Sex)
df_wm = subset(df_full, subset = Defendant_Race == "WHITE" & Defendant_Sex == "MALE")
head(df_wm, 20)
##     Arrestee_Type Arrest_Year Arrest_Date Month Day Arrest_Hour Age
## 1    Adult Arrest        2016  2016-01-01    01   1           0  39
## 2    Adult Arrest        2016  2016-01-01    01   1           0  27
## 12   Adult Arrest        2016  2016-01-01    01   1           1  27
## 14   Adult Arrest        2016  2016-01-01    01   1           1  26
## 24   Adult Arrest        2016  2016-01-01    01   1          13  48
## 54   Adult Arrest        2016  2016-01-01    01   1           2  25
## 76   Adult Arrest        2016  2016-01-01    01   1           3  21
## 84   Adult Arrest        2016  2016-01-01    01   1           3  41
## 96   Adult Arrest        2016  2016-01-01    01   1           6  29
## 98   Adult Arrest        2016  2016-01-01    01   1           7  22
## 104  Adult Arrest        2016  2016-01-02    01   2           0  51
## 110  Adult Arrest        2016  2016-01-02    01   2           1  29
## 114  Adult Arrest        2016  2016-01-02    01   2          11  64
## 123  Adult Arrest        2016  2016-01-02    01   2          15  33
## 131  Adult Arrest        2016  2016-01-02    01   2          16  23
## 138  Adult Arrest        2016  2016-01-02    01   2          17  49
## 161  Adult Arrest        2016  2016-01-02    01   2          21  30
## 171  Adult Arrest        2016  2016-01-02    01   2           3  22
## 175  Adult Arrest        2016  2016-01-02    01   2           4  28
## 194  Adult Arrest        2016  2016-01-03    01   3          15  27
##     Defendant_PSA Defendant_District Defendant_Race Defendant_Ethnicity
## 1    Out of State       Out of State          WHITE             UNKNOWN
## 2    Out of State       Out of State          WHITE        NOT HISPANIC
## 12   Out of State       Out of State          WHITE            HISPANIC
## 14   Out of State       Out of State          WHITE        NOT HISPANIC
## 24            404                 4D          WHITE        NOT HISPANIC
## 54   Out of State       Out of State          WHITE            HISPANIC
## 76   Out of State       Out of State          WHITE            HISPANIC
## 84            307                 3D          WHITE            HISPANIC
## 96   Out of State       Out of State          WHITE            HISPANIC
## 98            402                 4D          WHITE            HISPANIC
## 104  Out of State       Out of State          WHITE        NOT HISPANIC
## 110  Out of State       Out of State          WHITE            HISPANIC
## 114  Out of State       Out of State          WHITE             UNKNOWN
## 123           302                 3D          WHITE            HISPANIC
## 131           506                 5D          WHITE            HISPANIC
## 138  Out of State       Out of State          WHITE            HISPANIC
## 161  Out of State       Out of State          WHITE        NOT HISPANIC
## 171  Out of State       Out of State          WHITE            HISPANIC
## 175           201                 2D          WHITE        NOT HISPANIC
## 194           403                 4D          WHITE            HISPANIC
##     Defendant_Sex                   Arrest_Category
## 1            MALE                    Simple Assault
## 2            MALE                    Simple Assault
## 12           MALE Driving/Boating While Intoxicated
## 14           MALE                    Simple Assault
## 24           MALE                    Simple Assault
## 54           MALE                    Simple Assault
## 76           MALE             Liquor Law Violations
## 84           MALE Driving/Boating While Intoxicated
## 96           MALE                    Simple Assault
## 98           MALE                    Simple Assault
## 104          MALE       Release Violations/Fugitive
## 110          MALE                Traffic Violations
## 114          MALE                    Simple Assault
## 123          MALE   Assault with a Dangerous Weapon
## 131          MALE                 Weapon Violations
## 138          MALE             Liquor Law Violations
## 161          MALE                         Narcotics
## 171          MALE                    Simple Assault
## 175          MALE                Damage to Property
## 194          MALE                Traffic Violations
##                                        Charge_Description
## 1                         Threats To Do Bodily Harm -misd
## 2                                          Simple Assault
## 12                     Driving While Intoxicated -2nd Off
## 14                                         Simple Assault
## 24                                         Simple Assault
## 54                                         Simple Assault
## 76  Poss Of Open Container Of Alcohol/public Intoxication
## 84                       Driving Under Influence -2nd Off
## 96                                         Simple Assault
## 98                                         Simple Assault
## 104                              Failure To Appear (USAO)
## 110                                             No Permit
## 114                                        Simple Assault
## 123                       Assault With A Dangerous Weapon
## 131                             Possess Prohibited Weapon
## 138     Possession Of An Open Container Of Alcohol (poca)
## 161               Poss W/i To Dist A Controlled Substance
## 171                                        Simple Assault
## 175               Destruction Of Property Less Than $1000
## 194                                             No Permit
##     Arrest_Location_District Offense_Location_District  Weekday
## 1                         2D                        2D   Friday
## 2                         3D                        3D   Friday
## 12                        4D                        4D   Friday
## 14                        5D                        5D   Friday
## 24                        1D                        1D   Friday
## 54                        3D                        3D   Friday
## 76                        2D                        2D   Friday
## 84                        2D                        2D   Friday
## 96                        2D                        2D   Friday
## 98                        4D                        4D   Friday
## 104                       4D                        1D Saturday
## 110                       4D                        4D Saturday
## 114                       2D                        2D Saturday
## 123                       4D                        4D Saturday
## 131                       5D                        5D Saturday
## 138                       3D                        3D Saturday
## 161                       1D                        1D Saturday
## 171                       2D                        2D Saturday
## 175                       3D                        3D Saturday
## 194                       4D                        4D   Sunday
# i want to create a week-day variable
df_wm$Weekday = weekdays(df_wm$Arrest_Date)
df_wm$Weekday = factor(df_wm$Weekday, levels = as.character(wday(c(2:7,1), label=TRUE, abbr=FALSE)))

same pattern here as above, will dig into a few other stuff too..

White Men - By the Hour

wm_by_hour <- df_wm %>% 
           group_by(Arrest_Hour) %>% 
           dplyr::summarise(Total = n())
wm_by_hour
## # A tibble: 24 × 2
##    Arrest_Hour Total
##          <int> <int>
##  1           0   606
##  2           1   801
##  3           2   813
##  4           3   695
##  5           4   552
##  6           5   329
##  7           6   324
##  8           7   405
##  9           8   424
## 10           9   442
## # … with 14 more rows
ggplot(wm_by_hour, aes(Arrest_Hour, Total, color = Arrest_Hour)) + 
    geom_line() + 
    ggtitle("White Males - Crimes By Hour") + 
    xlab("Hour of the Day") + 
    ylab("Total Crimes")

White Men - What day of the month has the highest crime incidents?

wm_by_day <- df_wm %>% 
           group_by(Day) %>% 
           dplyr::summarise(Total = n())
wm_by_day
## # A tibble: 31 × 2
##      Day Total
##    <int> <int>
##  1     1   524
##  2     2   404
##  3     3   403
##  4     4   395
##  5     5   405
##  6     6   456
##  7     7   406
##  8     8   377
##  9     9   399
## 10    10   430
## # … with 21 more rows
ggplot(wm_by_day, aes(Day, Total, color = Day)) + 
    geom_line() + 
    ggtitle("White Males - Crimes By Day") + 
    xlab("Day of the Month") + 
    ylab("Total Crimes")

White Men - What Day of the Week has the highest crime incidents?

wm_by_weekday = df_wm %>% group_by(Weekday) %>% 
           dplyr::summarise(Total = n())
wm_by_weekday$Percent <- wm_by_weekday$Total/dim(df_wm)[1] * 100
wm_by_weekday
## # A tibble: 7 × 3
##   Weekday   Total Percent
##   <fct>     <int>   <dbl>
## 1 Monday     1566    12.7
## 2 Tuesday    1465    11.9
## 3 Wednesday  1640    13.4
## 4 Thursday   1736    14.1
## 5 Friday     1877    15.3
## 6 Saturday   2046    16.7
## 7 Sunday     1953    15.9
ggplot(wm_by_weekday, aes(Weekday, Total, fill = Weekday)) + 
      geom_bar(stat = "identity") +
      ggtitle("White Males - Crimes By Weekday ") + 
      xlab("Day of the Week") + ylab("Count") + 
      theme(legend.position = "none")

White Males - By Month

wm_by_month <- df_wm %>% 
            group_by(Month) %>% 
            dplyr::summarise(Total = n())

wm_by_month$Percent <- wm_by_month$Total/dim(df_wm)[1] * 100
wm_by_month
## # A tibble: 12 × 3
##    Month Total Percent
##    <fct> <int>   <dbl>
##  1 01     1191    9.70
##  2 02      998    8.13
##  3 03     1125    9.16
##  4 04      922    7.51
##  5 05     1059    8.62
##  6 06     1010    8.22
##  7 07      966    7.86
##  8 08      962    7.83
##  9 09     1023    8.33
## 10 10     1108    9.02
## 11 11      991    8.07
## 12 12      928    7.56
ggplot(wm_by_month, aes(Month, Total, fill = Month)) + 
        geom_bar(stat = "identity") + 
        ggtitle("White Males - Crimes By Month") + 
        xlab("Month") + 
        ylab("Count") + 
        theme(legend.position = "none")

ggplot(wm_by_month, aes(x=Month, y=Total, group=1)) + geom_line()

####White Men - Yearly Crime Incidents

wm_by_year = df_wm %>% group_by(Arrest_Year) %>% 
           dplyr::summarise(Total = n())
wm_by_year$Percent <- wm_by_year$Total/dim(df_wm)[1] * 100
wm_by_year
## # A tibble: 6 × 3
##   Arrest_Year Total Percent
##   <fct>       <int>   <dbl>
## 1 2016         2620   21.3 
## 2 2017         2636   21.5 
## 3 2018         2298   18.7 
## 4 2019         2193   17.9 
## 5 2020         1426   11.6 
## 6 2021         1110    9.04
ggplot(wm_by_year, aes(Arrest_Year, Total, fill = Arrest_Year)) + 
      geom_bar(stat = "identity") +
      ggtitle("White Males - Crimes By Year ") + 
      xlab("Year") + ylab("Count") + 
      theme(legend.position = "none")

ggplot(wm_by_year, aes(x=Arrest_Year, y=Total, group=1)) + geom_line()

Let us get into crime types…

wm_by_cat <- df_wm %>% 
          group_by(Arrest_Category) %>% 
          dplyr::summarise(Total = n()) %>% 
          arrange(desc(Total))

wm_by_cat[1:10,]
## # A tibble: 10 × 2
##    Arrest_Category                   Total
##    <fct>                             <int>
##  1 Simple Assault                     2661
##  2 Traffic Violations                 1549
##  3 Release Violations/Fugitive        1133
##  4 Driving/Boating While Intoxicated  1045
##  5 Other Crimes                        822
##  6 Theft                               675
##  7 Narcotics                           654
##  8 Liquor Law Violations               562
##  9 Disorderly Conduct                  433
## 10 Damage to Property                  414
ggplot(wm_by_cat, aes(reorder(Arrest_Category, Total), Total)) + 
    geom_bar(stat = "identity") + coord_flip() +  
    scale_y_continuous(breaks = seq(0,3000,500)) + 
    ggtitle("Crimes By Arrest Category") + 
    xlab("Crime Type") + 
    ylab("Number of Incidents")

wm_by_cat_year <- df_wm %>% group_by(Arrest_Year, Arrest_Category) %>% 
                dplyr::summarise(Total = n())
## `summarise()` has grouped output by 'Arrest_Year'. You can override using the
## `.groups` argument.
wm_by_cat_year[1:10,]
## # A tibble: 10 × 3
## # Groups:   Arrest_Year [1]
##    Arrest_Year Arrest_Category                   Total
##    <fct>       <fct>                             <int>
##  1 2016        Aggravated Assault                   23
##  2 2016        Assault on a Police Officer          42
##  3 2016        Assault with a Dangerous Weapon      73
##  4 2016        Burglary                             25
##  5 2016        Damage to Property                   98
##  6 2016        Disorderly Conduct                   83
##  7 2016        Driving/Boating While Intoxicated   206
##  8 2016        Fraud and Financial Crimes           11
##  9 2016        Homicide                              2
## 10 2016        Kidnapping                            4
ggplot(wm_by_cat_year, aes(reorder(Arrest_Category, Total), Total, fill = Arrest_Year)) + 
    geom_bar(stat = "identity") + 
    scale_y_continuous(breaks = seq(0,3000,500)) + 
    coord_flip() + ggtitle("Crimes By Code and Year") + 
    xlab("Crime Text Code") + 
    ylab("Total Crimes")

Some Top Crimes

Location Stuff

unique(df_wm$Arrest_Location_District)
##  [1] 2D      3D      4D      5D      1D      7D      6D      UNKNOWN <NA>   
## [10]        
## Levels:  1D 2D 3D 4D 5D 6D 7D UNKNOWN
table(df_wm$Arrest_Location_District)
## 
##              1D      2D      3D      4D      5D      6D      7D UNKNOWN 
##      22    1858    3056    2556    2817    1231     437     260      29
unique(df_wm$Offense_Location_District)
##  [1] 2D      3D      4D      5D      1D      7D      6D      #N/A    UNKNOWN
## [10] Unk    
## Levels: #N/A 1D 2D 3D 4D 5D 6D 7D Unk UNKNOWN
table(df_wm$Offense_Location_District)
## 
##    #N/A      1D      2D      3D      4D      5D      6D      7D     Unk UNKNOWN 
##      12    2044    3103    2532    2720    1195     413     238      16      10
### drop the unknowns here .. they are few..
wm_by_ALD <- df_wm %>% group_by(Arrest_Location_District) %>% 
         dplyr::summarise(Total = n()) %>% 
         dplyr::arrange(desc(Total))
wm_by_ALD2 = wm_by_ALD[1:7,]

wm_by_OLD <- df_wm %>% group_by(Offense_Location_District) %>% 
         dplyr::summarise(Total = n()) %>% 
         dplyr::arrange(desc(Total))
wm_by_OLD2 = wm_by_OLD[1:7,]

ggplot(wm_by_ALD2, aes(reorder(Arrest_Location_District, -Total), Total)) + 
      geom_bar(stat = "identity") + 
      ggtitle("Crimes by Arrest Location District") + 
      xlab("Location District") + 
      ylab("Total Crimes") 

ggplot(wm_by_OLD2, aes(reorder(Offense_Location_District, -Total), Total)) + 
      geom_bar(stat = "identity") + 
      ggtitle("Crimes by Offense Location District") + 
      xlab("Location District") + 
      ylab("Total Crimes") 

# top 5 crimes in each district
#ALD_dc_top7 <- wm_by_ALD$Arrest_Location_District[1:5]

#ALD_top7_dc <- subset(df_wm, Arrest_Location_District %in% wm_by_ALD$Arrest_Location_District[1:5])
#ALD_top7_dc$Arrest_Location_District <- factor(ALD_top7_dc$Arrest_Location_District)

#ggplot(ALD_top7_dc, aes(Arrest_Category, fill = Arrest_Location_District)) + 
      #geom_bar(position = "dodge") + 
      #ggtitle("Crimes by District Police HeadQuarters - Top 5") + 
      #xlab("Police HQ") + 
      #ylab("Total Crimes") 

#top crime by ARREST Location District 
ALD_by_crime <- df_wm  %>% 
      group_by(Arrest_Location_District, Arrest_Category) %>% 
      dplyr::summarise(Total = n()) %>% 
      arrange(desc(Total)) %>% top_n(n = 1)
## `summarise()` has grouped output by 'Arrest_Location_District'. You can
## override using the `.groups` argument.
## Selecting by Total
ALD_by_crime1 = ALD_by_crime[1:7,]

#dc_by_crime <- as.data.frame(dc_by_crime)
#dc_by_crime$Dc_Dist <- factor(dc_by_crime$Dc_Dist)
#dc_by_crime$Text_General_Code <- factor(dc_by_crime$Text_General_Code)

ggplot(ALD_by_crime1, aes(Arrest_Location_District, Total, fill = Arrest_Category)) + 
      geom_bar(stat = "identity") + 
      ggtitle("Top Crime by Arrest Location District") + 
      xlab("Location District") + 
      ylab("Total") 

#top crime by OFFENSE Location District
OLD_by_crime <- df_wm  %>% 
      group_by(Offense_Location_District, Arrest_Category) %>% 
      dplyr::summarise(Total = n()) %>% 
      arrange(desc(Total)) %>% top_n(n = 1)
## `summarise()` has grouped output by 'Offense_Location_District'. You can
## override using the `.groups` argument.
## Selecting by Total
OLD_by_crime1 = OLD_by_crime[1:7,]

#dc_by_crime <- as.data.frame(dc_by_crime)
#dc_by_crime$Dc_Dist <- factor(dc_by_crime$Dc_Dist)
#dc_by_crime$Text_General_Code <- factor(dc_by_crime$Text_General_Code)

ggplot(OLD_by_crime1, aes(Offense_Location_District, Total, fill = Arrest_Category)) + 
      geom_bar(stat = "identity") + 
      ggtitle("Top Crime by Offense Location District") + 
      xlab("Location District") + 
      ylab("Total")